{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Use Double DQN to Play MoutainCar-v0\n",
    "\n",
    "TensorFlow version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import sys\n",
    "import logging\n",
    "import itertools\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "import matplotlib.pyplot as plt\n",
    "import tensorflow.compat.v2 as tf\n",
    "tf.random.set_seed(0)\n",
    "from tensorflow import nn\n",
    "from tensorflow import losses\n",
    "from tensorflow import optimizers\n",
    "from tensorflow import keras\n",
    "from tensorflow.keras import layers\n",
    "from tensorflow.keras import models\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22:21:31 [INFO] env: <MountainCarEnv<MountainCar-v0>>\n",
      "22:21:31 [INFO] action_space: Discrete(3)\n",
      "22:21:31 [INFO] observation_space: Box(-1.2000000476837158, 0.6000000238418579, (2,), float32)\n",
      "22:21:31 [INFO] reward_range: (-inf, inf)\n",
      "22:21:31 [INFO] metadata: {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 30}\n",
      "22:21:31 [INFO] _max_episode_steps: 200\n",
      "22:21:31 [INFO] _elapsed_steps: None\n",
      "22:21:31 [INFO] id: MountainCar-v0\n",
      "22:21:31 [INFO] entry_point: gym.envs.classic_control:MountainCarEnv\n",
      "22:21:31 [INFO] reward_threshold: -110.0\n",
      "22:21:31 [INFO] nondeterministic: False\n",
      "22:21:31 [INFO] max_episode_steps: 200\n",
      "22:21:31 [INFO] _kwargs: {}\n",
      "22:21:31 [INFO] _env_name: MountainCar\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('MountainCar-v0')\n",
    "env.seed(0)\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class DQNReplayer:\n",
    "    def __init__(self, capacity):\n",
    "        self.memory = pd.DataFrame(index=range(capacity),\n",
    "                columns=['state', 'action', 'reward', 'next_state', 'done'])\n",
    "        self.i = 0\n",
    "        self.count = 0\n",
    "        self.capacity = capacity\n",
    "\n",
    "    def store(self, *args):\n",
    "        self.memory.loc[self.i] = args\n",
    "        self.i = (self.i + 1) % self.capacity\n",
    "        self.count = min(self.count + 1, self.capacity)\n",
    "\n",
    "    def sample(self, size):\n",
    "        indices = np.random.choice(self.count, size=size)\n",
    "        return (np.stack(self.memory.loc[indices, field]) for field in\n",
    "                self.memory.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "class DoubleDQNAgent:\n",
    "    def __init__(self, env):\n",
    "        self.action_n = env.action_space.n\n",
    "        self.gamma = 0.99\n",
    "\n",
    "        self.replayer = DQNReplayer(10000)\n",
    "\n",
    "        self.evaluate_net = self.build_net(\n",
    "                input_size=env.observation_space.shape[0],\n",
    "                hidden_sizes=[64, 64], output_size=self.action_n)\n",
    "        self.target_net = models.clone_model(self.evaluate_net)\n",
    "\n",
    "    def build_net(self, input_size, hidden_sizes, output_size):\n",
    "        model = keras.Sequential()\n",
    "        for layer, hidden_size in enumerate(hidden_sizes):\n",
    "            kwargs = dict(input_shape=(input_size,)) if not layer else {}\n",
    "            model.add(layers.Dense(units=hidden_size,\n",
    "                    activation=nn.relu, **kwargs))\n",
    "        model.add(layers.Dense(units=output_size))\n",
    "        optimizer = optimizers.Adam(lr=0.001)\n",
    "        model.compile(loss=losses.mse, optimizer=optimizer)\n",
    "        return model\n",
    "\n",
    "    def reset(self, mode=None):\n",
    "        self.mode = mode\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory = []\n",
    "            self.target_net.set_weights(self.evaluate_net.get_weights())\n",
    "\n",
    "    def step(self, observation, reward, done):\n",
    "        if self.mode == 'train' and np.random.rand() < 0.001:\n",
    "            # epsilon-greedy policy in train mode\n",
    "            action = np.random.randint(self.action_n)\n",
    "        else:\n",
    "            qs = self.evaluate_net.predict(observation[np.newaxis])\n",
    "            action = np.argmax(qs)\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory += [observation, reward, done, action]\n",
    "            if len(self.trajectory) >= 8:\n",
    "                state, _, _, act, next_state, reward, done, _ = \\\n",
    "                        self.trajectory[-8:]\n",
    "                self.replayer.store(state, act, reward, next_state, done)\n",
    "            if self.replayer.count >= self.replayer.capacity * 0.95:\n",
    "                    # skip first few episodes for speed\n",
    "                self.learn()\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        pass\n",
    "\n",
    "    def learn(self):\n",
    "        # replay\n",
    "        states, actions, rewards, next_states, dones = self.replayer.sample(1024)\n",
    "\n",
    "        # train\n",
    "        next_eval_qs = self.evaluate_net.predict(next_states)\n",
    "        next_actions = next_eval_qs.argmax(axis=-1)\n",
    "        next_qs = self.target_net.predict(next_states)\n",
    "        next_max_qs = next_qs[np.arange(next_qs.shape[0]), next_actions]\n",
    "        us = rewards + self.gamma * next_max_qs * (1. - dones)\n",
    "        targets = self.evaluate_net.predict(states)\n",
    "        targets[np.arange(us.shape[0]), actions] = us\n",
    "        self.evaluate_net.fit(states, targets, verbose=0)\n",
    "\n",
    "\n",
    "agent = DoubleDQNAgent(env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22:21:32 [INFO] ==== train ====\n",
      "22:21:40 [DEBUG] train episode 0: reward = -200.00, steps = 200\n",
      "22:21:51 [DEBUG] train episode 1: reward = -200.00, steps = 200\n",
      "22:22:00 [DEBUG] train episode 2: reward = -200.00, steps = 200\n",
      "22:22:12 [DEBUG] train episode 3: reward = -200.00, steps = 200\n",
      "22:22:24 [DEBUG] train episode 4: reward = -200.00, steps = 200\n",
      "22:22:36 [DEBUG] train episode 5: reward = -200.00, steps = 200\n",
      "22:22:50 [DEBUG] train episode 6: reward = -200.00, steps = 200\n",
      "22:23:04 [DEBUG] train episode 7: reward = -200.00, steps = 200\n",
      "22:23:18 [DEBUG] train episode 8: reward = -200.00, steps = 200\n",
      "22:23:33 [DEBUG] train episode 9: reward = -200.00, steps = 200\n",
      "22:23:54 [DEBUG] train episode 10: reward = -200.00, steps = 200\n",
      "22:24:16 [DEBUG] train episode 11: reward = -200.00, steps = 200\n",
      "22:24:30 [DEBUG] train episode 12: reward = -200.00, steps = 200\n",
      "22:24:52 [DEBUG] train episode 13: reward = -200.00, steps = 200\n",
      "22:25:16 [DEBUG] train episode 14: reward = -200.00, steps = 200\n",
      "22:25:32 [DEBUG] train episode 15: reward = -200.00, steps = 200\n",
      "22:25:45 [DEBUG] train episode 16: reward = -200.00, steps = 200\n",
      "22:25:59 [DEBUG] train episode 17: reward = -200.00, steps = 200\n",
      "22:26:12 [DEBUG] train episode 18: reward = -200.00, steps = 200\n",
      "22:26:25 [DEBUG] train episode 19: reward = -200.00, steps = 200\n",
      "22:26:39 [DEBUG] train episode 20: reward = -200.00, steps = 200\n",
      "22:26:54 [DEBUG] train episode 21: reward = -200.00, steps = 200\n",
      "22:27:08 [DEBUG] train episode 22: reward = -200.00, steps = 200\n",
      "22:27:21 [DEBUG] train episode 23: reward = -200.00, steps = 200\n",
      "22:27:36 [DEBUG] train episode 24: reward = -200.00, steps = 200\n",
      "22:27:51 [DEBUG] train episode 25: reward = -200.00, steps = 200\n",
      "22:28:06 [DEBUG] train episode 26: reward = -200.00, steps = 200\n",
      "22:28:20 [DEBUG] train episode 27: reward = -200.00, steps = 200\n",
      "22:28:34 [DEBUG] train episode 28: reward = -200.00, steps = 200\n",
      "22:28:49 [DEBUG] train episode 29: reward = -200.00, steps = 200\n",
      "22:29:03 [DEBUG] train episode 30: reward = -200.00, steps = 200\n",
      "22:29:18 [DEBUG] train episode 31: reward = -200.00, steps = 200\n",
      "22:29:32 [DEBUG] train episode 32: reward = -200.00, steps = 200\n",
      "22:29:44 [DEBUG] train episode 33: reward = -200.00, steps = 200\n",
      "22:29:57 [DEBUG] train episode 34: reward = -200.00, steps = 200\n",
      "22:30:10 [DEBUG] train episode 35: reward = -200.00, steps = 200\n",
      "22:30:24 [DEBUG] train episode 36: reward = -200.00, steps = 200\n",
      "22:30:44 [DEBUG] train episode 37: reward = -200.00, steps = 200\n",
      "22:31:03 [DEBUG] train episode 38: reward = -200.00, steps = 200\n",
      "22:31:16 [DEBUG] train episode 39: reward = -200.00, steps = 200\n",
      "22:31:31 [DEBUG] train episode 40: reward = -200.00, steps = 200\n",
      "22:31:46 [DEBUG] train episode 41: reward = -200.00, steps = 200\n",
      "22:32:00 [DEBUG] train episode 42: reward = -200.00, steps = 200\n",
      "22:32:16 [DEBUG] train episode 43: reward = -200.00, steps = 200\n",
      "22:32:30 [DEBUG] train episode 44: reward = -200.00, steps = 200\n",
      "22:32:44 [DEBUG] train episode 45: reward = -200.00, steps = 200\n",
      "22:33:01 [DEBUG] train episode 46: reward = -200.00, steps = 200\n",
      "22:33:36 [DEBUG] train episode 47: reward = -200.00, steps = 200\n",
      "22:35:04 [DEBUG] train episode 48: reward = -200.00, steps = 200\n",
      "22:36:48 [DEBUG] train episode 49: reward = -200.00, steps = 200\n",
      "22:38:43 [DEBUG] train episode 50: reward = -200.00, steps = 200\n",
      "22:40:51 [DEBUG] train episode 51: reward = -200.00, steps = 200\n",
      "22:43:12 [DEBUG] train episode 52: reward = -200.00, steps = 200\n",
      "22:45:32 [DEBUG] train episode 53: reward = -200.00, steps = 200\n",
      "22:47:46 [DEBUG] train episode 54: reward = -200.00, steps = 200\n",
      "22:49:48 [DEBUG] train episode 55: reward = -200.00, steps = 200\n",
      "22:52:03 [DEBUG] train episode 56: reward = -200.00, steps = 200\n",
      "22:54:16 [DEBUG] train episode 57: reward = -200.00, steps = 200\n",
      "22:56:27 [DEBUG] train episode 58: reward = -200.00, steps = 200\n",
      "22:58:36 [DEBUG] train episode 59: reward = -200.00, steps = 200\n",
      "23:00:55 [DEBUG] train episode 60: reward = -200.00, steps = 200\n",
      "23:03:22 [DEBUG] train episode 61: reward = -200.00, steps = 200\n",
      "23:05:41 [DEBUG] train episode 62: reward = -200.00, steps = 200\n",
      "23:07:57 [DEBUG] train episode 63: reward = -200.00, steps = 200\n",
      "23:10:15 [DEBUG] train episode 64: reward = -200.00, steps = 200\n",
      "23:12:59 [DEBUG] train episode 65: reward = -200.00, steps = 200\n",
      "23:15:58 [DEBUG] train episode 66: reward = -200.00, steps = 200\n",
      "23:18:54 [DEBUG] train episode 67: reward = -200.00, steps = 200\n",
      "23:21:51 [DEBUG] train episode 68: reward = -200.00, steps = 200\n",
      "23:24:52 [DEBUG] train episode 69: reward = -200.00, steps = 200\n",
      "23:27:53 [DEBUG] train episode 70: reward = -200.00, steps = 200\n",
      "23:30:54 [DEBUG] train episode 71: reward = -200.00, steps = 200\n",
      "23:33:49 [DEBUG] train episode 72: reward = -200.00, steps = 200\n",
      "23:36:48 [DEBUG] train episode 73: reward = -200.00, steps = 200\n",
      "23:39:48 [DEBUG] train episode 74: reward = -200.00, steps = 200\n",
      "23:42:58 [DEBUG] train episode 75: reward = -200.00, steps = 200\n",
      "23:46:10 [DEBUG] train episode 76: reward = -200.00, steps = 200\n",
      "23:49:28 [DEBUG] train episode 77: reward = -200.00, steps = 200\n",
      "23:52:42 [DEBUG] train episode 78: reward = -200.00, steps = 200\n",
      "23:55:46 [DEBUG] train episode 79: reward = -200.00, steps = 200\n",
      "23:58:41 [DEBUG] train episode 80: reward = -200.00, steps = 200\n",
      "00:01:36 [DEBUG] train episode 81: reward = -200.00, steps = 200\n",
      "00:04:32 [DEBUG] train episode 82: reward = -200.00, steps = 200\n",
      "00:07:24 [DEBUG] train episode 83: reward = -200.00, steps = 200\n",
      "00:10:21 [DEBUG] train episode 84: reward = -200.00, steps = 200\n",
      "00:13:15 [DEBUG] train episode 85: reward = -200.00, steps = 200\n",
      "00:16:10 [DEBUG] train episode 86: reward = -200.00, steps = 200\n",
      "00:19:04 [DEBUG] train episode 87: reward = -200.00, steps = 200\n",
      "00:21:58 [DEBUG] train episode 88: reward = -200.00, steps = 200\n",
      "00:24:52 [DEBUG] train episode 89: reward = -200.00, steps = 200\n",
      "00:27:49 [DEBUG] train episode 90: reward = -200.00, steps = 200\n",
      "00:30:42 [DEBUG] train episode 91: reward = -200.00, steps = 200\n",
      "00:33:43 [DEBUG] train episode 92: reward = -200.00, steps = 200\n",
      "00:36:36 [DEBUG] train episode 93: reward = -200.00, steps = 200\n",
      "00:39:30 [DEBUG] train episode 94: reward = -200.00, steps = 200\n",
      "00:42:25 [DEBUG] train episode 95: reward = -200.00, steps = 200\n",
      "00:45:19 [DEBUG] train episode 96: reward = -200.00, steps = 200\n",
      "00:48:11 [DEBUG] train episode 97: reward = -200.00, steps = 200\n",
      "00:51:04 [DEBUG] train episode 98: reward = -200.00, steps = 200\n",
      "00:53:57 [DEBUG] train episode 99: reward = -200.00, steps = 200\n",
      "00:56:51 [DEBUG] train episode 100: reward = -200.00, steps = 200\n",
      "00:59:47 [DEBUG] train episode 101: reward = -200.00, steps = 200\n",
      "01:02:39 [DEBUG] train episode 102: reward = -200.00, steps = 200\n",
      "01:05:33 [DEBUG] train episode 103: reward = -200.00, steps = 200\n",
      "01:08:27 [DEBUG] train episode 104: reward = -200.00, steps = 200\n",
      "01:11:20 [DEBUG] train episode 105: reward = -200.00, steps = 200\n",
      "01:14:14 [DEBUG] train episode 106: reward = -200.00, steps = 200\n",
      "01:17:06 [DEBUG] train episode 107: reward = -200.00, steps = 200\n",
      "01:20:00 [DEBUG] train episode 108: reward = -200.00, steps = 200\n",
      "01:22:54 [DEBUG] train episode 109: reward = -200.00, steps = 200\n",
      "01:25:48 [DEBUG] train episode 110: reward = -200.00, steps = 200\n",
      "01:28:43 [DEBUG] train episode 111: reward = -200.00, steps = 200\n",
      "01:31:37 [DEBUG] train episode 112: reward = -200.00, steps = 200\n",
      "01:34:31 [DEBUG] train episode 113: reward = -200.00, steps = 200\n",
      "01:37:24 [DEBUG] train episode 114: reward = -200.00, steps = 200\n",
      "01:40:18 [DEBUG] train episode 115: reward = -200.00, steps = 200\n",
      "01:43:13 [DEBUG] train episode 116: reward = -200.00, steps = 200\n",
      "01:46:08 [DEBUG] train episode 117: reward = -200.00, steps = 200\n",
      "01:49:01 [DEBUG] train episode 118: reward = -200.00, steps = 200\n",
      "01:51:53 [DEBUG] train episode 119: reward = -200.00, steps = 200\n",
      "01:54:48 [DEBUG] train episode 120: reward = -200.00, steps = 200\n",
      "01:57:41 [DEBUG] train episode 121: reward = -200.00, steps = 200\n",
      "02:00:34 [DEBUG] train episode 122: reward = -200.00, steps = 200\n",
      "02:03:26 [DEBUG] train episode 123: reward = -200.00, steps = 200\n",
      "02:06:18 [DEBUG] train episode 124: reward = -200.00, steps = 200\n",
      "02:09:10 [DEBUG] train episode 125: reward = -200.00, steps = 200\n",
      "02:12:03 [DEBUG] train episode 126: reward = -200.00, steps = 200\n",
      "02:14:57 [DEBUG] train episode 127: reward = -200.00, steps = 200\n",
      "02:17:50 [DEBUG] train episode 128: reward = -200.00, steps = 200\n",
      "02:20:42 [DEBUG] train episode 129: reward = -200.00, steps = 200\n",
      "02:23:36 [DEBUG] train episode 130: reward = -200.00, steps = 200\n",
      "02:26:31 [DEBUG] train episode 131: reward = -200.00, steps = 200\n",
      "02:29:25 [DEBUG] train episode 132: reward = -200.00, steps = 200\n",
      "02:32:18 [DEBUG] train episode 133: reward = -200.00, steps = 200\n",
      "02:35:10 [DEBUG] train episode 134: reward = -200.00, steps = 200\n",
      "02:38:03 [DEBUG] train episode 135: reward = -200.00, steps = 200\n",
      "02:40:56 [DEBUG] train episode 136: reward = -200.00, steps = 200\n",
      "02:43:48 [DEBUG] train episode 137: reward = -200.00, steps = 200\n",
      "02:46:30 [DEBUG] train episode 138: reward = -186.00, steps = 186\n",
      "02:49:23 [DEBUG] train episode 139: reward = -200.00, steps = 200\n",
      "02:51:56 [DEBUG] train episode 140: reward = -174.00, steps = 174\n",
      "02:54:51 [DEBUG] train episode 141: reward = -200.00, steps = 200\n",
      "02:57:34 [DEBUG] train episode 142: reward = -200.00, steps = 200\n",
      "03:00:08 [DEBUG] train episode 143: reward = -200.00, steps = 200\n",
      "03:02:42 [DEBUG] train episode 144: reward = -200.00, steps = 200\n",
      "03:05:17 [DEBUG] train episode 145: reward = -200.00, steps = 200\n",
      "03:07:51 [DEBUG] train episode 146: reward = -200.00, steps = 200\n",
      "03:10:25 [DEBUG] train episode 147: reward = -200.00, steps = 200\n",
      "03:12:59 [DEBUG] train episode 148: reward = -200.00, steps = 200\n",
      "03:15:33 [DEBUG] train episode 149: reward = -200.00, steps = 200\n",
      "03:18:07 [DEBUG] train episode 150: reward = -200.00, steps = 200\n",
      "03:20:38 [DEBUG] train episode 151: reward = -200.00, steps = 200\n",
      "03:23:08 [DEBUG] train episode 152: reward = -200.00, steps = 200\n",
      "03:25:37 [DEBUG] train episode 153: reward = -200.00, steps = 200\n",
      "03:28:08 [DEBUG] train episode 154: reward = -200.00, steps = 200\n",
      "03:30:39 [DEBUG] train episode 155: reward = -200.00, steps = 200\n",
      "03:33:10 [DEBUG] train episode 156: reward = -200.00, steps = 200\n",
      "03:35:39 [DEBUG] train episode 157: reward = -200.00, steps = 200\n",
      "03:38:09 [DEBUG] train episode 158: reward = -200.00, steps = 200\n",
      "03:40:40 [DEBUG] train episode 159: reward = -200.00, steps = 200\n",
      "03:43:10 [DEBUG] train episode 160: reward = -200.00, steps = 200\n",
      "03:45:38 [DEBUG] train episode 161: reward = -200.00, steps = 200\n",
      "03:48:07 [DEBUG] train episode 162: reward = -200.00, steps = 200\n",
      "03:50:38 [DEBUG] train episode 163: reward = -200.00, steps = 200\n",
      "03:53:09 [DEBUG] train episode 164: reward = -200.00, steps = 200\n",
      "03:55:40 [DEBUG] train episode 165: reward = -200.00, steps = 200\n",
      "03:58:10 [DEBUG] train episode 166: reward = -200.00, steps = 200\n",
      "03:59:26 [DEBUG] train episode 167: reward = -101.00, steps = 101\n",
      "04:01:31 [DEBUG] train episode 168: reward = -165.00, steps = 165\n",
      "04:04:00 [DEBUG] train episode 169: reward = -200.00, steps = 200\n",
      "04:06:02 [DEBUG] train episode 170: reward = -165.00, steps = 165\n",
      "04:07:38 [DEBUG] train episode 171: reward = -129.00, steps = 129\n",
      "04:09:05 [DEBUG] train episode 172: reward = -113.00, steps = 113\n",
      "04:11:11 [DEBUG] train episode 173: reward = -156.00, steps = 156\n",
      "04:13:19 [DEBUG] train episode 174: reward = -172.00, steps = 172\n",
      "04:14:54 [DEBUG] train episode 175: reward = -126.00, steps = 126\n",
      "04:17:22 [DEBUG] train episode 176: reward = -200.00, steps = 200\n",
      "04:19:44 [DEBUG] train episode 177: reward = -200.00, steps = 200\n",
      "04:21:57 [DEBUG] train episode 178: reward = -200.00, steps = 200\n",
      "04:23:35 [DEBUG] train episode 179: reward = -145.00, steps = 145\n",
      "04:25:47 [DEBUG] train episode 180: reward = -200.00, steps = 200\n",
      "04:27:23 [DEBUG] train episode 181: reward = -141.00, steps = 141\n",
      "04:29:35 [DEBUG] train episode 182: reward = -200.00, steps = 200\n",
      "04:31:46 [DEBUG] train episode 183: reward = -200.00, steps = 200\n",
      "04:33:46 [DEBUG] train episode 184: reward = -200.00, steps = 200\n",
      "04:35:45 [DEBUG] train episode 185: reward = -200.00, steps = 200\n",
      "04:37:41 [DEBUG] train episode 186: reward = -200.00, steps = 200\n",
      "04:39:35 [DEBUG] train episode 187: reward = -200.00, steps = 200\n",
      "04:41:29 [DEBUG] train episode 188: reward = -200.00, steps = 200\n",
      "04:43:23 [DEBUG] train episode 189: reward = -200.00, steps = 200\n",
      "04:45:09 [DEBUG] train episode 190: reward = -185.00, steps = 185\n",
      "04:47:03 [DEBUG] train episode 191: reward = -200.00, steps = 200\n",
      "04:48:58 [DEBUG] train episode 192: reward = -200.00, steps = 200\n",
      "04:50:49 [DEBUG] train episode 193: reward = -200.00, steps = 200\n",
      "04:52:37 [DEBUG] train episode 194: reward = -200.00, steps = 200\n",
      "04:54:24 [DEBUG] train episode 195: reward = -200.00, steps = 200\n",
      "04:56:07 [DEBUG] train episode 196: reward = -200.00, steps = 200\n",
      "04:57:50 [DEBUG] train episode 197: reward = -200.00, steps = 200\n",
      "04:59:29 [DEBUG] train episode 198: reward = -192.00, steps = 192\n",
      "05:01:06 [DEBUG] train episode 199: reward = -162.00, steps = 162\n",
      "05:02:49 [DEBUG] train episode 200: reward = -200.00, steps = 200\n",
      "05:03:41 [DEBUG] train episode 201: reward = -93.00, steps = 93\n",
      "05:04:34 [DEBUG] train episode 202: reward = -102.00, steps = 102\n",
      "05:06:19 [DEBUG] train episode 203: reward = -200.00, steps = 200\n",
      "05:08:09 [DEBUG] train episode 204: reward = -200.00, steps = 200\n",
      "05:09:54 [DEBUG] train episode 205: reward = -189.00, steps = 189\n",
      "05:11:17 [DEBUG] train episode 206: reward = -149.00, steps = 149\n",
      "05:13:03 [DEBUG] train episode 207: reward = -200.00, steps = 200\n",
      "05:14:46 [DEBUG] train episode 208: reward = -200.00, steps = 200\n",
      "05:16:30 [DEBUG] train episode 209: reward = -200.00, steps = 200\n",
      "05:18:19 [DEBUG] train episode 210: reward = -200.00, steps = 200\n",
      "05:19:05 [DEBUG] train episode 211: reward = -84.00, steps = 84\n",
      "05:20:48 [DEBUG] train episode 212: reward = -200.00, steps = 200\n",
      "05:21:33 [DEBUG] train episode 213: reward = -86.00, steps = 86\n",
      "05:22:18 [DEBUG] train episode 214: reward = -86.00, steps = 86\n",
      "05:23:27 [DEBUG] train episode 215: reward = -133.00, steps = 133\n",
      "05:24:16 [DEBUG] train episode 216: reward = -94.00, steps = 94\n",
      "05:25:02 [DEBUG] train episode 217: reward = -89.00, steps = 89\n",
      "05:25:46 [DEBUG] train episode 218: reward = -84.00, steps = 84\n",
      "05:26:41 [DEBUG] train episode 219: reward = -105.00, steps = 105\n",
      "05:28:27 [DEBUG] train episode 220: reward = -200.00, steps = 200\n",
      "05:29:30 [DEBUG] train episode 221: reward = -120.00, steps = 120\n",
      "05:30:24 [DEBUG] train episode 222: reward = -106.00, steps = 106\n",
      "05:31:13 [DEBUG] train episode 223: reward = -94.00, steps = 94\n",
      "05:31:59 [DEBUG] train episode 224: reward = -86.00, steps = 86\n",
      "05:32:52 [DEBUG] train episode 225: reward = -100.00, steps = 100\n",
      "05:32:52 [INFO] ==== test ====\n",
      "05:33:02 [DEBUG] test episode 0: reward = -109.00, steps = 109\n",
      "05:33:11 [DEBUG] test episode 1: reward = -104.00, steps = 104\n",
      "05:33:20 [DEBUG] test episode 2: reward = -109.00, steps = 109\n",
      "05:33:30 [DEBUG] test episode 3: reward = -109.00, steps = 109\n",
      "05:33:39 [DEBUG] test episode 4: reward = -110.00, steps = 110\n",
      "05:33:52 [DEBUG] test episode 5: reward = -150.00, steps = 150\n",
      "05:34:00 [DEBUG] test episode 6: reward = -86.00, steps = 86\n",
      "05:34:09 [DEBUG] test episode 7: reward = -107.00, steps = 107\n",
      "05:34:18 [DEBUG] test episode 8: reward = -104.00, steps = 104\n",
      "05:34:27 [DEBUG] test episode 9: reward = -104.00, steps = 104\n",
      "05:34:35 [DEBUG] test episode 10: reward = -88.00, steps = 88\n",
      "05:34:44 [DEBUG] test episode 11: reward = -104.00, steps = 104\n",
      "05:34:53 [DEBUG] test episode 12: reward = -105.00, steps = 105\n",
      "05:35:01 [DEBUG] test episode 13: reward = -104.00, steps = 104\n",
      "05:35:09 [DEBUG] test episode 14: reward = -87.00, steps = 87\n",
      "05:35:18 [DEBUG] test episode 15: reward = -106.00, steps = 106\n",
      "05:35:28 [DEBUG] test episode 16: reward = -109.00, steps = 109\n",
      "05:35:37 [DEBUG] test episode 17: reward = -104.00, steps = 104\n",
      "05:35:46 [DEBUG] test episode 18: reward = -106.00, steps = 106\n",
      "05:35:55 [DEBUG] test episode 19: reward = -102.00, steps = 102\n",
      "05:36:02 [DEBUG] test episode 20: reward = -87.00, steps = 87\n",
      "05:36:12 [DEBUG] test episode 21: reward = -107.00, steps = 107\n",
      "05:36:21 [DEBUG] test episode 22: reward = -103.00, steps = 103\n",
      "05:36:30 [DEBUG] test episode 23: reward = -104.00, steps = 104\n",
      "05:36:39 [DEBUG] test episode 24: reward = -107.00, steps = 107\n",
      "05:36:48 [DEBUG] test episode 25: reward = -103.00, steps = 103\n",
      "05:36:57 [DEBUG] test episode 26: reward = -105.00, steps = 105\n",
      "05:37:05 [DEBUG] test episode 27: reward = -99.00, steps = 99\n",
      "05:37:15 [DEBUG] test episode 28: reward = -105.00, steps = 105\n",
      "05:37:24 [DEBUG] test episode 29: reward = -104.00, steps = 104\n",
      "05:37:32 [DEBUG] test episode 30: reward = -102.00, steps = 102\n",
      "05:37:41 [DEBUG] test episode 31: reward = -103.00, steps = 103\n",
      "05:37:51 [DEBUG] test episode 32: reward = -107.00, steps = 107\n",
      "05:38:00 [DEBUG] test episode 33: reward = -110.00, steps = 110\n",
      "05:38:09 [DEBUG] test episode 34: reward = -104.00, steps = 104\n",
      "05:38:18 [DEBUG] test episode 35: reward = -108.00, steps = 108\n",
      "05:38:26 [DEBUG] test episode 36: reward = -87.00, steps = 87\n",
      "05:38:35 [DEBUG] test episode 37: reward = -105.00, steps = 105\n",
      "05:38:44 [DEBUG] test episode 38: reward = -104.00, steps = 104\n",
      "05:38:52 [DEBUG] test episode 39: reward = -91.00, steps = 91\n",
      "05:39:00 [DEBUG] test episode 40: reward = -101.00, steps = 101\n",
      "05:39:08 [DEBUG] test episode 41: reward = -87.00, steps = 87\n",
      "05:39:15 [DEBUG] test episode 42: reward = -86.00, steps = 86\n",
      "05:39:24 [DEBUG] test episode 43: reward = -105.00, steps = 105\n",
      "05:39:32 [DEBUG] test episode 44: reward = -86.00, steps = 86\n",
      "05:39:41 [DEBUG] test episode 45: reward = -106.00, steps = 106\n",
      "05:39:50 [DEBUG] test episode 46: reward = -105.00, steps = 105\n",
      "05:39:59 [DEBUG] test episode 47: reward = -106.00, steps = 106\n",
      "05:40:07 [DEBUG] test episode 48: reward = -86.00, steps = 86\n",
      "05:40:15 [DEBUG] test episode 49: reward = -99.00, steps = 99\n",
      "05:40:24 [DEBUG] test episode 50: reward = -107.00, steps = 107\n",
      "05:40:32 [DEBUG] test episode 51: reward = -88.00, steps = 88\n",
      "05:40:39 [DEBUG] test episode 52: reward = -87.00, steps = 87\n",
      "05:40:48 [DEBUG] test episode 53: reward = -104.00, steps = 104\n",
      "05:40:58 [DEBUG] test episode 54: reward = -106.00, steps = 106\n",
      "05:41:07 [DEBUG] test episode 55: reward = -104.00, steps = 104\n",
      "05:41:16 [DEBUG] test episode 56: reward = -106.00, steps = 106\n",
      "05:41:25 [DEBUG] test episode 57: reward = -109.00, steps = 109\n",
      "05:41:32 [DEBUG] test episode 58: reward = -86.00, steps = 86\n",
      "05:41:42 [DEBUG] test episode 59: reward = -106.00, steps = 106\n",
      "05:41:51 [DEBUG] test episode 60: reward = -105.00, steps = 105\n",
      "05:42:00 [DEBUG] test episode 61: reward = -104.00, steps = 104\n",
      "05:42:09 [DEBUG] test episode 62: reward = -105.00, steps = 105\n",
      "05:42:18 [DEBUG] test episode 63: reward = -108.00, steps = 108\n",
      "05:42:27 [DEBUG] test episode 64: reward = -103.00, steps = 103\n",
      "05:42:34 [DEBUG] test episode 65: reward = -87.00, steps = 87\n",
      "05:42:43 [DEBUG] test episode 66: reward = -104.00, steps = 104\n",
      "05:42:52 [DEBUG] test episode 67: reward = -106.00, steps = 106\n",
      "05:43:00 [DEBUG] test episode 68: reward = -89.00, steps = 89\n",
      "05:43:09 [DEBUG] test episode 69: reward = -107.00, steps = 107\n",
      "05:43:18 [DEBUG] test episode 70: reward = -104.00, steps = 104\n",
      "05:43:26 [DEBUG] test episode 71: reward = -87.00, steps = 87\n",
      "05:43:33 [DEBUG] test episode 72: reward = -85.00, steps = 85\n",
      "05:43:41 [DEBUG] test episode 73: reward = -87.00, steps = 87\n",
      "05:43:48 [DEBUG] test episode 74: reward = -85.00, steps = 85\n",
      "05:43:57 [DEBUG] test episode 75: reward = -104.00, steps = 104\n",
      "05:44:06 [DEBUG] test episode 76: reward = -106.00, steps = 106\n",
      "05:44:15 [DEBUG] test episode 77: reward = -102.00, steps = 102\n",
      "05:44:24 [DEBUG] test episode 78: reward = -104.00, steps = 104\n",
      "05:44:31 [DEBUG] test episode 79: reward = -85.00, steps = 85\n",
      "05:44:40 [DEBUG] test episode 80: reward = -105.00, steps = 105\n",
      "05:44:53 [DEBUG] test episode 81: reward = -150.00, steps = 150\n",
      "05:45:02 [DEBUG] test episode 82: reward = -106.00, steps = 106\n",
      "05:45:10 [DEBUG] test episode 83: reward = -85.00, steps = 85\n",
      "05:45:19 [DEBUG] test episode 84: reward = -107.00, steps = 107\n",
      "05:45:26 [DEBUG] test episode 85: reward = -87.00, steps = 87\n",
      "05:45:34 [DEBUG] test episode 86: reward = -90.00, steps = 90\n",
      "05:45:41 [DEBUG] test episode 87: reward = -86.00, steps = 86\n",
      "05:45:50 [DEBUG] test episode 88: reward = -103.00, steps = 103\n",
      "05:45:59 [DEBUG] test episode 89: reward = -107.00, steps = 107\n",
      "05:46:08 [DEBUG] test episode 90: reward = -107.00, steps = 107\n",
      "05:46:18 [DEBUG] test episode 91: reward = -105.00, steps = 105\n",
      "05:46:27 [DEBUG] test episode 92: reward = -105.00, steps = 105\n",
      "05:46:36 [DEBUG] test episode 93: reward = -108.00, steps = 108\n",
      "05:46:45 [DEBUG] test episode 94: reward = -106.00, steps = 106\n",
      "05:46:54 [DEBUG] test episode 95: reward = -108.00, steps = 108\n",
      "05:47:03 [DEBUG] test episode 96: reward = -106.00, steps = 106\n",
      "05:47:11 [DEBUG] test episode 97: reward = -87.00, steps = 87\n",
      "05:47:24 [DEBUG] test episode 98: reward = -150.00, steps = 150\n",
      "05:47:33 [DEBUG] test episode 99: reward = -102.00, steps = 102\n",
      "05:47:33 [INFO] average episode reward = -101.99 ± 11.75\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD5CAYAAADP2jUWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAzwElEQVR4nO2de7Qk11Xev11dffvO645k6zXSaKyngyUZjHURMgZjjDAKCbasIJbIWshJCGMUQYAVFqBohdgLDAkhsHCwlQiZ2IIE48RRZCLLwnIAG0dgjYwsaTQIjxC2xiM0es7ce2du3+6qnT9OnapT1VXdVbe7+nW/31qzqrse3ef29Nq1+9vf2UdUFYQQQrYW3qQHQAghZPww+BNCyBaEwZ8QQrYgDP6EELIFYfAnhJAtCIM/IYRsQfy6XlhE3gDgPwNYBNAF8C9U9YvRsVsB/AiAAMC/VNX7B73eGWecoRdccEFdwyWEkLnk4YcffkFVz8zury34A/hVAO9T1ftE5Pui528VkcsA3AjgcgDnAnhARF6rqkG/F7vgggtw4MCBGodLCCHzh4h8NW9/nbKPAliKHu8GcDR6/E4AH1PVtqo+DeAwgKtqHAchhJAMdWb+PwXgfhH5NZibzLdF+88D8OfOeUeifT2IyH4A+wFg3759tQ2UEEK2GkMFfxF5AMA5OYduA/DdAH5aVT8hIj8I4MMArgEgOefn9phQ1TsA3AEAy8vL7ENBCCEjYqjgr6rXFB0TkbsA/GT09H8AuDN6fATA+c6pe5FIQoQQQsZAnZr/UQDfGT1+G4CvRI8/CeBGEWmJyIUALgXwxRrHQQghJEOdmv+PAvhNEfEBrCPS7lX1oIh8HMATMBbQWwY5fQghhIyW2oK/qv4ZgCsLjr0fwPvrem9CCCH94QxfQggZIcdPdfCHXx5cxvzjvzqGZ146OYYR5cPgTwghI+RTjz2Ln/j9v8SLq+3Cc7pBiPf87sP4nS88PcaRpWHwJ4SQEdIJQgBANyx2p3/1pZPYCEKcbE+u3MngTwghIySMgn6oijBUHDuxjtV2N3XOU8dWAQCnOsXBPwwVdS6zy+BPCCEjxIZrVeC2//0Yrvrlz+KNv/gZvODIQIefN8F/vU/w/6cfeQjv+8MnahtnnVZPQgjZcli1J1TF3x1fBwBsdEO8vLaBM3a2AABPHVsD0D/zP3j0OII+0tGwMPMnhJARYqUa1eRGAACdIHkyKPPvBCFeWN3ASkYuGiUM/oQQMkJCdTR/R7O3Wbyqxpr/eifMfQ0rEa2sd2obJ4M/IYSMEBvvs7XabmgC/bGVdlwALpJ9njthgv/qOjN/QgiZCVzN3838rfXzcJT179m9iFMb+cH/2AlTK1hh8CeEkNkgkX2A0FF1upHm/1Sk919+7m60uwXBf8Vk/qc6AbpBvjQ0LAz+hBAyQpKCr8n8G55ZwiRwMv9dLR+vefX2gZk/gJ45AqOCwZ8QQkZIrPlHj5sNE/w70c+Aw8dWcdFZO7Gt2cCpTpA7kctm/kB90g+DPyGEjBBX81comg0TZgNH9rnkzJ3YttBAqGkLqOU5J/Nn8CeEkBkg1vxDcyNo+SbMdkPFifUOnjvRxiVn7Yz35zl+jq204+OUfQghZMyc3OjGjdrKohmfv+/Z4B/ib543M3svPnMHti00AORP9HruRBsXnbkTQH1efwZ/Qggp4LJfuB8/eteBSte4Ik6oQNNPCr7W5nlJpPkDvcG/G4R4ca2Ni8/cAYCZPyGETIQ/efL5Sue7M3xVFQuR5t8NTPBvNgT7XrUdi1Hwz8o+L65tQBW4OMr8T1DzJ4SQ6Scp+Fq3TyL7PHv8FPbs3ga/4cWZf9bu+Xzk9LnwjCjzZ/AnhJDpJ9vbZ8Ep+HaCMC7kLsayT7qmcPyU0fjPXlpEwxNq/oQQMhM4vX1CJ/MPQsVGV+E3bPA326zmfyIK/ru3NbFr0afmTwghk6JKX/3QmeGrqvEkr26g6IYhFqLn1u2T1fxt5r97exM7Wz59/oQQMileWtsofa6r+YeqKc2/GySZf5Hb53gq828y+BNCyKRwl2AcRNrtk57k1QlC+FGvnyK3z/FTHTQ8wY6FBna1fGr+hBAyKV5cLZ/5a0rzTyZ5BYGiGya/BBYL3D7HT3Wwe1sTIjK9mr+I3CAiB0UkFJHlzLFbReSwiDwpIt/r7L9SRB6Ljn1ARGSYMRBCSB24DdeqZP6a0vwBP27sFmX+VvOPgn+72+v22b2tCQDYuTi9mv/jAK4H8Dl3p4hcBuBGAJcDuBbAh0SkER2+HcB+AJdG/64dcgyEEDJy3Gab1WSfZGtbOvueIAhDdIIk8282BJ7kZ/5LUfCf2sxfVQ+p6pM5h94J4GOq2lbVpwEcBnCViOwBsKSqD6q5Pd4F4LphxkAIIXUQpjL/KgVfR/MH4Img4Qm6oaIbhLH7R0Tits4uJ9zMv9XEynont+3zsNSl+Z8H4Bnn+ZFo33nR4+x+QgiZKlx354tVZB9nG6pCBPA9iayeSQ0AMHbPPLfPbifz7wTaIw2NAn/QCSLyAIBzcg7dpqr3FF2Ws0/77C967/0wEhH27ds3YKSEkGnlgSeew8Vn7YxbFswC4ZCaf6iKMAQEJvM3k7wSzR8AWn5v5m+CvwnN7/imc3Hla06PHUKjZGDwV9VrNvG6RwCc7zzfC+BotH9vzv6i974DwB0AsLy8PPrfPYSQsfCzn3gU7/imc/Hed1w+6aGUJq35V5B9Qnu9meTliZnl2w1DdMMQzT6Zv6rixHo3zvzPf9V2nP+q7cP9IQXUJft8EsCNItISkQthCrtfVNVnAayIyNWRy+cmAEW/Hgghc0InCCv3xZ80m838s4u5xJp/oOgGGrd4Bozjx+3ts7YRIAg1Dv51MqzV810icgTAmwDcKyL3A4CqHgTwcQBPAPg0gFtU1d7ebgZwJ0wR+CkA9w0zBkLI9GM977OEO968BVeKcDV/hcLzIs0/nuSVhN3Fppdy+7ize+tmoOzTD1W9G8DdBcfeD+D9OfsPALhimPclhMwWoWql/jjTgDvcKkNPd/UEAIHf8BCEGlk9k8x/sdlI+fiPnxxf8OcMX0JI7YRqnC6zhGuvrPKrJZnhm2j+vifoBEbzt719ACv79Gb+Swz+hJB5IFQgnLHgb4fre4IqilWS+fdq/p1A0fTSmX9e8GfmTwiZC1QVwWzF/jjzb3iyyczf3Ag8AfyGh42o4N3MZP6u1dP28l9aZPAnhMwBQagznflXCf7Zrp4ipr2DLey6sk/TF3Scu6Lby79uGPwJIbUTarUFUaaBdOZf5Tqztcs4ipjXsBm+W/BtSPrGcmK9A0+AnQtDeXFKweBPCKkVG0SDmbN6mq3f8Cr11klW8jL/PBE0GxJr++5sXRFJ/SI6caqDXYtNeDXM6M3C4E8IqRUb22Yt8w+HzPwVGmv+DU/i/jxNPwm7nqSLySvtLna26s/6AQZ/QkjN2CA6q8F/05p/mGT+vpdM5nLbO3iStpGurHexa5HBnxAyB7gF0FnCDrcRWT3LSj9hj+Yv8BuJ5u82dvMyvypWGfwJIfOCzrjsYzX6ssPXjOZvWzrHmr/j9pFs5t82mv84YPAnhNTK7Mo+ZtuIg3+58Se9fQo0f6eY62XcPqvr1PwJIXOCK4PMEknm76Wel73OLuPoiaS9/c5jY/VMrqXmTwiZG2wwnL3ePmZrM/+y9y73ZqdIJnlZUpp/j+zTxc4xBf/xvAshZMuiUbv6WZvha7V7G6xLyz5O5q9qli9sOMG/mdL8k2LyRhBioxuOpbUDwMyfEFIzseY/c7KP2TYqF3yj66MLPJGUvdPPaP72mtWotTM1f0LIXJAUfCc8kIr0un2qaf7dOPgDjYYr+6R9/vYa29efmj8hZC4IM5mwywurbVz+C5/Gl7728phHNRh3hi+QyFdlrwuixXw9L635L7jB3/lVsdpm5k8ImSP69fb5+sunsLYR4JmXTo57WAOxw63u9jFb+0vH+Pwd2afh9vax1yhOrJuOnvT5E0Lmgn69fdaibLczhc3+s5l/aatqxtoqkFTAz3b1tOeuUvYhhMwT/SZ5rUX9bqxEMk1stuAba/6Bo/m7Vk8v3djNvjY1f0LIXNE3+M9Q5l++t09G8xdJzeotkn1WItmHmj8hZC7QjAziYouc3Sm0AmnG7VPWqhrLXFb2EaDhZPupgq8kxeS44MvMnxAyD9iMv1/mP42zf7MzfEv7/KNtUvBNa/79rJ4LvoeW3xhm2KVh8CeE1EoZzX8ag78dkp2RW3aGsvbIPihu7+AUk1faXSyNKesHGPwJITWTlUFc1qZY9unV/Ktd13Vm+KbaO3jp9g6A+WxWxtjRExgy+IvIDSJyUERCEVl29n+PiDwsIo9F27c5x66M9h8WkQ+I/esJIXOJznjBt/IM30wvo2zmn2f1NO0dxtfLHxg+838cwPUAPpfZ/wKA71fV1wN4N4DfdY7dDmA/gEujf9cOOQZCyBTTb4ZvXPCdQqtnr+ZfrZ9/UvAVNNw2zqnePohfe5ztnIEhg7+qHlLVJ3P2/6WqHo2eHgSwKCItEdkDYElVH1STDtwF4LphxkAImW76NXY7OdWa/3AredlfOiLJAi7NhsAVO1yf/+oYF28HxqP5/yMAf6mqbQDnATjiHDsS7SOEzCnuguZZEqvnNAZ/s7U2zao+/2SSV6L5uxO8AMfnH45f8x/4TiLyAIBzcg7dpqr3DLj2cgD/HsDb7a6c0wo/URHZDyMRYd++fYOGSgiZQmzMzJN2ZqPga5+Xvc5sA1fzj3R+1+ljjiWafxBqz/E6GRj8VfWazbywiOwFcDeAm1T1qWj3EQB7ndP2AjiavdZ57zsA3AEAy8vL05caEEIG4i5rqKop2WOaZR+Ng3+1xm7ZRnZmJS/zGu4ELwDw4huLIlBN1QPqphbZR0ROA3AvgFtV9Qt2v6o+C2BFRK6OXD43Aej764EQMtu4cT0b46da9ol+jFR1+2hP5p+0dC7K/ANVaLTe77gY1ur5LhE5AuBNAO4VkfujQz8O4BIA/0ZEHon+nRUduxnAnQAOA3gKwH3DjIEQMt24QTNr94ytnlPo9hnW5x8XfJ3XyGr+ieyjCMLxBv+hqguqejeMtJPd/0sAfqngmgMArhjmfQkhs4NbKHVvBGGoiewzhZm/HVH1lbzMNp7k5SWzhJsFmX+o5t/Myz6EEGJxk3038z/ZCXL3Twux5h8F7LJjVFh3U47bJ6v5Oz7/MFSMc8orgz8hpFbcyV1uYddKPgDQmUq3j9lW9flbBStd8LU+/6zVU+JrQtV4xu84YPAnhNRKquDrPFl1gv80un2SSV7VfP49k7yQZPy9sk/yXoFq3OhtHDD4E0JqxQ2a7izfk+1E9pnqzL9RdSUvs82f5FXs8w8Vs+P2IYSQQZTJ/G2WvN4JcP2HvoBHnnllXMMrRDNun/K9fdI+f7exW4/mHz0NIs1/jIk/gz8hpF7Cgszfav47W36cJb+w2saXvvYKDh49Pt5B5hBbPWVzbp+kt0+ymEux22dOJnkRQoglKPD5r22Y4L97WzP2+dvjZRdOqRNbuK3q889q/ibzt5p/vs8/DBWqwDg73DP4E0JqRYuCf6T5797WjDN/29d/CmJ/UvBtDJ/5D5rkZQvedPsQQuYGd/JukGP13L2tGQe/fuv9jpukn7/t7VP2ut7Mv1ko+6Dn3HHB4E8IqZXUrF7nsS34Lm3z466etvNn2Sy7TmzhdrMzfN3ePkWTvKzMY91OtHoSQuaG9Azf5HG7G2Kh4WHBb8SZfzeWfSYf/JN+/kn/nXLXRX9LdCMTV/PvsXqarb1RsOBLCJkbijT/ThCi2RA0PYkz30T+Ge8Y8+hZyavkmOyfa//UtNsna/W0mT9lH0LInJFu6ZwJ/r4HvyHxTcHKP9OY+Vft528z/7TPP9/qmZzLzJ8QMie4QbObyvwVzYYHv+HFme90FXzT7R2qzvC1vxRczb/X6mm27mzgccHgTwiplaJ+/p0gRNMzTc9s5tsJp0jzDzdr9ezV/G3QL2rv0KXmTwiZN3SQ7ON5COLMP5J9piDz37zsY7Z5bp+mX+DzDxKJaFww+BNCaqUo8+9Gsk+zIfEMXyt/BNOQ+ff09il3Xdzbx+3q6ZkbwKLfSJ1rVR77i2ecVs+hVvIihJBBFDV22wjCSPOXOOhPk9vH3n/8ylZPsw2cgC4i+O2brsQV5+5OnRuv4RuMv+DL4E8IqZXigq+xejY8D93QLGBuj5cNtHXSm/lX0/yzs3bf9g1n95xruz2wvQMhZO4o6uffiTJ/O/EpCDXWvqfC7RNtY7dPRZ+/Dej9mrU1JO3z5zKOhJC5oUj2MVZPiVsedMMk859OzX/wmPIWq+8n5Uim4Eu3DyFkbuhr9Wx4sabeCcKpauncq/kPviZvsfp+4Tz2+YeDbxSjhsGfEFIrfWf4RgVfwDh9khm+Yx1iLvYGVCXzTzexM9t+Ab1nhi8zf0LIvJDu7ZPs70697GO2VdbwzbtB9Evm2c+fEDK3uBJO16mabmQKvt0wTLp6TkHqv5llHPNO6a/5m2131hq7icgNInJQREIRWc45vk9EVkXkZ5x9V4rIYyJyWEQ+IONct4wQMnaCAbKPlVW6gfYs6jJJ7C8Wr4LPP++Uvpm/ly74zpLs8ziA6wF8ruD4bwC4L7PvdgD7AVwa/bt2yDEQQqaYItmn0zWyj+17Ywq+U6T5q8nEk0XWy1zTe1K/zD+2es5awVdVD6nqk3nHROQ6AH8D4KCzbw+AJVV9UM034i4A1w0zBkLIdJMqgmYkILfgG4TqrOE7+egfqpq+PJKMr8w1Wfol8/FiLoEtLlcf52ap5a1EZAeAnwPwvsyh8wAccZ4fifYRQuaUlP3RCY4b3azVU6eqpbPJ/AXi2edlgn/vvn7KdryMY9wBdIraO4jIAwDOyTl0m6reU3DZ+wD8hqquZv6YvL+s8BMVkf0wEhH27ds3aKiEkCmk2OcfuX086/YJ4yDYz+3z+NePY9+rt2NpsVnTiA0KhTiyT6kfI7kF3+LTs/38x+n2GRj8VfWaTbzutwL4ARH5VQCnAQhFZB3AJwDsdc7bC+Bon/e+A8AdALC8vDz5VIAQUhk3aOZO8mo4mX8wuLfPD/6XB3HLd12CW77rknoGHKFR5m8DdFWfv6VfNu9lJKWZb+ymqt9hH4vIewGsqupvRc9XRORqAH8B4CYA/6mOMRBCpgNX57dBzjZxMy2dvfjYILdPGCpObgRYbXdrHrV5r9EUfIvPt6/did0+1ce5WYa1er5LRI4AeBOAe0Xk/hKX3QzgTgCHATyFXjcQIWSOyJvhawu7C75r9QzjeQBFLZ2tHDSOeQCx5l8p8+/d19fnn+nqOTOZv6reDeDuAee8N/P8AIArhnlfQsjskKf520zX9wRNK/uEScG3KNDGC72PJfhnNf8SPv8c0b9fPG9kMn82diOEzA15LZ1tgdO4fazsEw60eo7TDaSqEJFKsk/+JK8qmn/1cW4WBn9CSK3ktXTeiDLdpp8p+A4I7rHsM4Z5AMkkL/t89D7/3vYOMyL7EELIIEJVNDxJFXStzNH0HKtnoPH+bBB95JlXsNbu4rI9S+bcMck+RvOvUvDt3VelqydlH0LI3BCq0xY5G/zdls5hWJj5/9b//Qp+5b5DYy/42sDvSdnePlW7epot+/kTQuYOVWOZbHgSB2+r7Td9D00n8+/GBd/0a2wEik5Xx1rwteMGTFCuo6tnnPlT9iGEzBtJj5zEwmkz/4WGoOFk/vFiLpngHoaKQHWsK33ZSV6ADf6Dr8md5NXn/FjzD2fM508IIYOwfnnPc33+1uqZ9PPvBMWLuVhJaNxWzzjz92ry+UcziDsTaO/A4E8IqRXrl/c9r8fnb9w+VvYpXszFFIudmsCY3D7iZv4lbjh5mv8gKccTiTP/qWrsRgghw2DlE4U6wd/6/MUp+LqTvNKvEYSKMEwy/iAYh+yjsQxTXvbp3ScDUmxPJGnsRrcPIWReCEJj9bR2T8DV/JOWzt1Qk66eBZm/lV7Gk/lrnLWLpGWfdjfAz3/iURxbWU9dE6/+5cTwQZm/iOv2GcXIy8HgTwiplTDX7RNp/s4M324QFrZ3sL8KxlnwDTUp1noiKSfPU8fW8LGHnsEXn36p5xogncEPiucm8w/jx+OCsg8hpFasdi5wff6O7OMWfIN8n3+QCf7jnOQFmIzcvSF1C36h2N4+5rpy9k3Pzfwp+xBC5oWUzz9H9vE843ix0g7Qm/nb2cGDGr+NdtyJFTPr8+8U3KSi4cc3NKD/JC/72pNYzIXBnxBSK7HPPyf4W6eP3/DQCcPCSV4267fHu2Mo+KY1/3TB18o02V8g9gbhyj4DM38vcftQ8yeEzA3W55/S/LuJ7AOYHj/dfrJPtPjLpAq+2fYORYvO2FMalTL/5JcEZR9CyNxgff4NcTL/MJF9AJP5uwXfbFDtBoowLL451EGP7OMsMNMpyPyt5l8p83eOU/YhhMwN8Vq4XqKbd7pJYzfAaORdR/PPTpbKav7jCP72FwuQU/C1N6HMkmN5bp9Bybw7sYuN3Qghc0Ns9XQz/yh42glefiMj+2SDv6Z/MYxrMRc7yatH8w/7a/6+06Rn0Kzd1JwA9vYhhMwLoZP5x43dwmzmny74ZtfwtcF+ozu+4J/S/L30r5Eit088ySueGTz4fTxm/oSQeSTp7SMIoqCfFHy9aBst9hLkyz52/3iDf5K1NzJWz6LMPy74Oi6hQbg3CLZ3IITMDRpl0J4nsA7NThDGLR8AE/S6fbp62mDftsF/TG4fd4av6y4t9PlnNP9SmX8FZ9AoYfAnhNRKGCLu5++u5OVOhGo2PHSCsNBCaYO9zfzH18/fPM729ily+2Q1/3KZP90+hJA5JLZ6eumCr7V5AkCr2cCpTlDYuyfR/AMAk2jvIGmff5z5Z90+ab9+Oc3ffczgTwiZE+KCr6QbuzX9JPzsbDVwYr2busbFBvuNYEIF35I+f8Syj9nKwLZumYIvNX9CyLxgLZN+Qwpln+0LPk6c6sTPXU0/DDUupI6/4GseZ2WfonUFEs3fhNYysTyZSDbceKvC4E8IqRWbQZsVq5IMvtlwM38fx6Pgv9DwUrKPeyPYGGPBV7OZf5XePs7M4EHYc8bp9AGGDP4icoOIHBSRUESWM8e+UUQejI4/JiKL0f4ro+eHReQDMs51ywghY8daJhvODN9uoFhwZJ8drUYc/Fu+lwrubpbfHmPmr+r49Uv6/LON3cpEN6+CLXSUDJv5Pw7gegCfc3eKiA/g9wD8mKpeDuCtAOxvutsB7AdwafTv2iHHQAiZYvJn+IZxUzcA2LHgx8cWfA+qSbB1A+xENf8yPv9oG1s9S2Tz9pxxOn2AIYO/qh5S1SdzDr0dwKOq+uXovBdVNRCRPQCWVPVBNf+zdwG4bpgxEEKmm1j2ybR0dlsg7Ggl60rZXwQ2rroBdlKTvHpbOue7fTRr9SzxPvb+MFOyTx9eC0BF5H4R+ZKI/Gy0/zwAR5zzjkT7chGR/SJyQEQOPP/88zUNlRBSJ8bnn54l2wk05fbJC/55TdzGK/toHJizjd2s7NOj+Uf3gsTqWV7zH7cAPnAZRxF5AMA5OYduU9V7+rzutwP4FgAnAXxWRB4GcCLn3ML/RVW9A8AdALC8vFz//zYhZOTEi7k00pn/Qkr2acSPrf8/zJN9xjrDt3gN36JlHLMF3yrtHcad+Q8M/qp6zSZe9wiAP1XVFwBARD4F4I0wdYC9znl7ARzdxOsTQmYEVRPYejX/QbJPn+BfY+b/6JFX8HfH1/uu4VuU+SeafxWrZ/lfCaOkLtnnfgDfKCLbo+LvdwJ4QlWfBbAiIldHLp+bABT9eiCEzAFh5PN3V/LaCDRewhEwVk9LVvbpOrq6W/DNNn8bFXd+/mn80r2HUpp/7xq+0TgyPv9E868i+9jtDAV/EXmXiBwB8CYA94rI/QCgqi8D+HUADwF4BMCXVPXe6LKbAdwJ4DCApwDcN8wYCCHTjevzt3G8m5V9nODfspl/dK5bU21H7R3M69Yz3nY3QCcIM5p/eoZvsc/fbDdj9Rz3JK+Bsk8/VPVuAHcXHPs9GJknu/8AgCuGeV9CyOyQ+PxRKPtsdzV/3zy2vxJSmX83eRyEWotOvtEN0Qm0p59/x8nyO3ExelBvnwpWzzlx+xBCCIDENZNawL2f7FOi4JvdP0o2ghCdIDQ9ieJFWTI+/6I1fKOnfqXMP3mPccLgTwipFdvYreGle/s0B8o+vUXVthv8a9L8O12zqIyqlvD595/hW8XqOc4lHAEGf0JIzdgZvr7nxYVSY/VMt3ewxAXfPjN8gd5i66hoByE6UTM5V49PtXfIuTEBeSt5DX4/b87cPoQQAiDR/H0vaezWCTSl+W9rNmL5o9VnktfGGDJ/o/mH8U0LKG7sVpT5V9H87Skz1d6BEEIGYTX/pu9k/t0QviP7iAh2LBjpx2b+NrbntXcAatT8uwFUzQ0qmeSVaelc5PPflOY/mRm+DP6EkFqxrhmzVKPx53fCtOwDANsj6cf+IohX9dICzb+m4G9dPe1umOq46b5dJ57hm+/2qab5I3XNuGDwJ4TUii34Wl9/N9Qe2QdIir6tjObfDcYv+5ht4EzySmv+cebfM8nLbKss4F7lRjFKGPwJIbVi1/C1wX49Wqs3G/yt3XMh4/YZd8HXvsdGEGY0/5wZvoP6+Zfo6zlv7R0IIQQAYteM9fWf2jCzdF3NH0gmerWiSV42rhYt7FJ35u/KPtmCb1Hwz/bzr+Lzp+xDCJkrrGvGyj4no+Cf1fyzmX/i9knr6pai/cNig396Ja/sYi4FLZ031dtnMu0dGPwJIbXiFnyBJPg3M5m/1fztTSLM0fxdghpiv6qmpKW05p+cVzzJC9H55SduJecy8yeEzBFhaIJoEvy7AJBq7wAA2xdswTfq7ZPj9nHp1pD5dzI3mkGaf3YMugm3jzjvMU4Y/AkhtRI6Pn+gn+xjgn62n39WWolft4bMfyPzc8L14LtZfjenGA30un3KLeM4g2v4EkLIIGLZx7Oav8n8m36B7NNnMReXOgq+rpUUSLde0JyC7yDNv8xKXlWKw6OEwZ8QUiu2O6aVfdbaVvNPh5837jsd33LB6fFNwCbhxZp/HbJP/msWzfDN2k2z/fzLreSVvmZcDNXPnxBCBmG7Y8ayTyeyemaqoW957Zl4y2vPxP976gUAjtunIMOvo+DbL/NPu32KWjprz3WDYGM3QshIUFWsd4LBJ44JM8M3cfecimSfBT8/2FntWwfIPnUUfNs9wd9s3fYOqhoXhos0fzuHoVI/f7p9CCHD8MkvH8XVv/LZ1JKHkyRURUMkLvAWyT4WK38EBQVfGyNrKfhmg78j3+TdjLI3oJ4ZvvT5E0LGxTMvncQrJztxkJ00YWhkHz9j9SwK/jZgxlbPTPC3BeE6Mv+s28e1Ydph2Kx/sekh1PT4Ys2/QkAXun0IIaOgHbcnmI7gb9s7WNlnrWCSl8VmzUVWz2ZmmcdRki34ulm5fT/b0XOxmV5r2B1TpTV8KfsQQkZB0pWynvYHVUnaO6R7+xTKPlHAtIm9dfXYOGq7fha5gIaht+CL6L2TJSjt+y5mJqO5VGnvUMUZNEoY/AmZM9pOY7JpwFg9e2f4FgV/awJKlnE0z23Qzy7wPkrK+PztKl6LTSs/OZl/mNX8B79nPMmLmT8hZBis3NPuTEvwj1o6Z2b4Fsk+Xpz5pxu72aCfNH4b/VizN8xYj/dc2cdq/lHmH+Ro/hUKvuL8uhgnDP6EzBk26E+d5h/P8N2c22fBT7d/qKe3T77s4xZ8bebfioK/O47Y7VOh4Mv2DoSQkTB9sk/U2ye2eg6QfWzmHwVb4xZKfilk2z+MkqzsYxdjEWeSV+z2ybSeBpJ+/psq+M6S5i8iN4jIQREJRWTZ2d8UkY+KyGMickhEbnWOXRntPywiH5Bx/9YhZM5pT2XBV3Jkn/6Zf1xgDRW+J/F+K//UUvAtzPxd91Ha7eNq/nax+rghXIn3nNWWzo8DuB7A5zL7bwDQUtXXA7gSwHtE5ILo2O0A9gO4NPp37ZBjIIQ4xJr/lMg+odqWzlnZp0jzN9tkMRdFw5PYQVOn1bN4kpcr+yQ+f3ecdkwiyeKN5TT/GWzvoKqHVPXJvEMAdoiID2AbgA0AJ0RkD4AlVX1QzXS5uwBcN8wYCCFppkn2SXrdAE3PWj3Lyj5O8Bcn8/d7XTajIqv5S07m3wmKM3/byiJeAaxEPLcfw7xo/v8TwBqAZwF8DcCvqepLAM4DcMQ570i0LxcR2S8iB0TkwPPPP1/TUAmZL+LgPwVuH3dlKy+Sbmxjt4Gyj1PwbTiyTyuzwPso6e3tk2j+quZmZoN94vNPrtHoV06VbL7Kql+jZGBXTxF5AMA5OYduU9V7Ci67CkAA4FwApwP4fPQ6eZ9E4f+gqt4B4A4AWF5erme1ZkLmjHZnemSf0Mn8ASP1rHc0fpxH7PaJJ3kp/IaHhpe2etaR+VvZZ6HhYSMIU24fwAT3TnTOtoXBmn+ZgD4p2Wdg8FfVazbxuv8YwKdVtQPgmIh8AcAygM8D2OuctxfA0U28PiGkgI0pkn2sHm4DXLPhYb0TwvekUA+3u12rpyeJ5m8LvkXdPodhIzBjW/Bt8E9bNkPV2OffavYWnkNVCBzNv0TJN3uDGRd1/dD4GoC3iWEHgKsB/JWqPgtgRUSujlw+NwEo+vVACNkE06X5m60NbDZwF0k+QG9L5zBy+3gZzb+W4N8NseB7TkvmtBMnVGeGb057h1jzd5Z/HMRMzvAVkXeJyBEAbwJwr4jcHx36IICdMG6ghwD8V1V9NDp2M4A7ARwG8BSA+4YZAyEkzTQF/17Zxwb/4kCXyD5pzd/PBv+aGrst+F48zqS3j9mGTi//fKtnVN+okM1PKvMfaiUvVb0bwN05+1dh7J551xwAcMUw70sIKWaarJ5J8DeBzWbU/TJ/LxP8gzCE33B9/ibo1lHw3eiGWGh4PY3ZXM0/8fkXWT1dHX/we7prBowTLuNIyJwxjW4fm9SWkX16rJ5qpKBs5l9XwbfZ8Hoas7maf7cn83fdPpHPv1LmP4OyDyFkulDVpKVzHZ3PKpJd0zaWfQqWcAScls5RbA/CMGX1tDeBspn/4WMrOPLyyVLntoMQLd+LZamsE8fIPv0y/7TmX2aKr9s2epww+BMyR7gBf5oy/1jz98vIPmYba/5B2udv9f+ymf9P/8GX8cufOlTq3E43X/N3+w1lff4pzR+6Cc3f/l2lhjgyGPwJmQM6QYhf/D9P4Osvn4r3TZXmn2nN0OxjgE8Wc9H4NdyCbyNy/pQt+L60toGX1jZKnbsRpN0+Waunqjr9/PNbOqcneQ1+z6n1+RNCpp+vPLeKD//Z0zhzVyveN11unyj4eyVkn5yWzr4nKW3c9yQVdPux2u6WXs/YFnyDjEvJLUJbt08rZzEXjQu+0XXz6vYhhEwHq1Gb5BdX2/G+aQj+WZ9/GdlHejT/KPNvJMG/IeUyf1XFarsbfz6DsAVfLzM5TVKyTybzT63ktXmf/7xM8iKEjJGV9Q4A4IXVRN7YmCbZp8fn3z/0NDxxVvKymr+5xvcEjYaUmuTV7oYIQi0d/GOfv18s+3SCrObvuH0ymn+ZIq4Xy1mlhjgymPkTMgfY4PbClGX+YTbzLzHJC0Aqs89O8vIkyvxLBP+VdfO5rK6XC/7tqOCLrh13evyhooTbR3puGv2g24cQsmlOrFvZx2T+vifT4faJ5RPzvIzPHzCOHzfz9z0vpfl7XrKyVj/sTfFUJ4gLtf3YiGf45mf+1ucvkj/fIDsm+vwJIbViZZ8X10zmv7StORVun6zmX2aGrz3f7efvZdw+vielVvJac+SetY3Bn4ct+NrxZRdXD1Wx3gmw6DfgeznLOKq5cVXR8WdyGUdCyHSwmsn8dy36UyL7WKuneW6D6sIgzV8k3dI50vmBKPMvWfBdceSeMrp/3N4hDv697R1ePtnB6dubcabe29JZKvXmn8mVvAgh04ENcjYQ7Vr0p2IN3x6rZxRU/QGavyvrxIu5uFbPkgXfVOZfIvjHBd9Mvx1X9jl+agOnbV+If4kEjpzUq/lT9iGE1Eg2q11abE5J5m+2NrtdKCn7NDxJNXZzl3FsVCj4up/LSomi70bPDN9seweT+Z+2vRn/Eslq/oKkq0OVgi8zf0JIZazmbzHBfxo0/81ZPd01c4NQ0Wgkmr/t8Fk1+JfJ/DcC4/NPZvia/W5L51dObuB0N/PPaP6prp4lor+9qZU5d5Qw+BMyB2Sz2qVtRvPXki0Q6iLIyD5+SatntuDrav6ebC74D9L8290AnUCxfaHhFHzTkoyq4pWTHewu0vyzPv+BI6zW/nmUMPgTMgdkg/+uxaZZb7ZkC4S6sPOfbGDbjOyT1fx9r3zwX6sQ/J9fMU6ps3a1cqyeNssHXjllCr55bh8zwzfR/EtN8opOaVD2IYRUJU/zBybf1tlm75Ip+Jaxetqhh6Gm+vl7NviXdPvYmDpootcxG/yXWvEvlGzB98R6B0GoOG3bQryvR/OX3nUA+sH2DoSQTbOy3klJKbsWzeT9dmeyun9vbx9r9Rzk9knqBd1QI53fXNuIMuuyss8ZO1vx434cO7EOADhr16Lj9kln8LY76Gnbm5DohhSEabePVHb72L+ZwZ8QUgFVxcp6F+fsXgRgJJMdLdN3ZtKOn6LePn4Zn79b8PUk7n1ji79lZZ/TtjWx2PQGFnzdzD87ycvrCf4LZiw96woovIqZPzV/QsimaHdDdEPFnt3bAAAt30PLn7bgb2WfkjN8XaunamT1dDL/CgXfHS0fO1s+VgZm/m14Arx6Rytnkpc5xwb/07cbWS3bWjrr8y/T1pM+f0LIpjgR2TzPjTL/lu/FfWcmbffMruFbpbFb7PYJFA0vWVTd98pn/qvtLnYtmuA/KPN/7sQ6ztzVQsMTp+CLaGse2BnUp0XBP5v5b0bzt/dBNnYjhFTCFjL3nGYz/wZaUfCf9CzfojV87c2pCE8kdgolmn/1gu/qehc7Wz52tPxSBd+zdi2mxpnty//yybTs4ze8nq6eVTX/2E7K4E8IqYK1ecaZf3OaZB+zzco+/oDmN+4yjUHUL8ddwL2K1dPKPgMLvittnL1kisN+Ix3048w/kn12b8vP/E1vn826fQafO0oY/AmZcWxQS2n+Ua/5Sbd1zhZ8F8rKPj0tnTOZf9l+/m2T+ZcK/ifWcabN/L389g4vr21gV8tPCtcZt49mNP8qPn+6fQghlbCtHazbZ8H3Ytln8pp/2ufvl5R9rNtHVZNlHJ3ePmUKvqqKNRv8F/sH/04Q4sW1DZwVrYFctJLXS2sb2B3p/UCB5o9kZm+VZRxnSvYRkf8gIn8lIo+KyN0icppz7FYROSwiT4rI9zr7rxSRx6JjH5BxVzkImTOs7LN7WxM7Wz5afsMp+E5a8zdbm7WXlX1EBKEms2ftAi72cZmC76lOgFCBnYtG8+9X8LWze89eWkyNL7vK1ktrpq+PJTuO0LZ0rqT5m22VNtCjYNi3+wyAK1T1GwH8NYBbAUBELgNwI4DLAVwL4EMi0oiuuR3AfgCXRv+uHXIMhGxpbPDf2fKxa9FPWT0nXfDdvOxj1vC1un8q849uBIMKvrbAu6PlY1fL79vV85jT2sGML7+3z6lOEDt97P605m+CebVlHCczw3eoNXxV9Y+cp38O4Aeix+8E8DFVbQN4WkQOA7hKRP4WwJKqPggAInIXgOsA3DfMOPrxzz/6EL764sm6Xp6QiWMdKDsX3eBvgtcvf+oQPvjHhyc2tpPR6lmx1TMaV7OE7PPwV1/G9/3m581zL13w9T3BMy+dxPf8+p8WvoZda3dnq4EdLdPoruh8O86zlmzwz1o9k3NtsdeMxcOfPvl8/LrPvHwSbzj/NIhnrxsc0ONaxiwF/wz/DMAfRI/Pg7kZWI5E+zrR4+z+XERkP8yvBOzbt29Tg9r3qh0D9UVCZp1LztqFZsPDT7ztUixta+KcpUX8k2+7AMdW1ic9NLz5klfjdXuWAACvP2839r/lIlx1wav6XvPDb3oN7nv8WQDAZefuxjWvOwuv3tHCzW+9GBeduRM3XHl+HNz78c37TsebLz4Dr5zbwVeOraaKs1m+8++diW84x4zzWy96Nd7zlotw2bnm+ev2LOEHl/ditd3FD12VxKIf+fYL8Sd/fSx+funZO/EPXn8udrV8/MzbX4u3X3bOwDG+/rzdeM9bLsLyBacPPHeUyKCWryLyAIC8v+A2Vb0nOuc2AMsArldVFZEPAnhQVX8vOv5hAJ8C8DUAv6Kq10T7vwPAz6rq9w8a6PLysh44cKD8X0YIIQQi8rCqLmf3D8z8baDu88LvBvAPAXy3JneSIwDOd07bC+BotH9vzn5CCCFjZFi3z7UAfg7AO1TVFdY/CeBGEWmJyIUwhd0vquqzAFZE5OrI5XMTgHuGGQMhhJDqDKv5/xaAFoDPRFXxP1fVH1PVgyLycQBPAOgCuEVVreH4ZgAfAbANptBbW7GXEEJIPsO6fS7pc+z9AN6fs/8AgCuGeV9CCCHDQRsMIYRsQRj8CSFkC8LgTwghWxAGf0II2YIMnOQ1LYjI8wC+usnLzwDwwgiHM+vw80jgZ5GGn0fCvHwWr1HVM7M7Zyb4D4OIHMib4bZV4eeRwM8iDT+PhHn/LCj7EELIFoTBnxBCtiBbJfjfMekBTBn8PBL4WaTh55Ew15/FltD8CSGEpNkqmT8hhBAHBn9CCNmCzHXwF5FrowXkD4vIz096PJNARP5WRB4TkUdE5EC071Ui8hkR+Uq0He8SQmNERH5HRI6JyOPOvsK/X0Rujb4vT4rI905m1PVQ8Fm8V0S+Hn0/HhGR73OOze1nAQAicr6I/LGIHBKRgyLyk9H+LfH9mNvgHy0Y/0EAfx/AZQB+KFpYfivyXar6Bsez/PMAPquqlwL4bPR8XvkIgGsz+3L//uj7cSOAy6NrPhR9j+aFj6D3swCA34i+H29Q1U8BW+KzAEy7+X+lqq8DcDWAW6K/e0t8P+Y2+AO4CsBhVf0bVd0A8DGYheWJ+Rw+Gj3+KIDrJjeUelHVzwF4KbO76O9/J4CPqWpbVZ8GcBjmezQXFHwWRcz1ZwEAqvqsqn4perwC4BDMmuJb4vsxz8H/PADPOM/7LhY/xyiAPxKRh0Vkf7Tv7GhVNUTbsyY2uslQ9Pdv1e/Mj4vIo5EsZCWOLfVZiMgFAL4ZwF9gi3w/5jn4S86+rehrfbOqvhFG/rpFRN4y6QFNMVvxO3M7gIsBvAHAswD+Y7R/y3wWIrITwCcA/JSqnuh3as6+mf1M5jn4Fy0iv6VQ1aPR9hiAu2F+pj4nInsAINoem9wIJ0LR37/lvjOq+pyqBqoaAvhtJDLGlvgsRKQJE/j/m6r+r2j3lvh+zHPwfwjApSJyoYgswBRqPjnhMY0VEdkhIrvsYwBvB/A4zOfw7ui0dwO4ZzIjnBhFf/8nAdwoIi0RuRDApQC+OIHxjQ0b5CLeBfP9ALbAZyFm4fEPAzikqr/uHNoS349hF3CfWlS1KyI/DuB+AA0Av6OqByc8rHFzNoC7zXccPoD/rqqfFpGHAHxcRH4EwNcA3DDBMdaKiPw+gLcCOENEjgD4twD+HXL+flU9KCIfB/AEjBPkFlUNJjLwGij4LN4qIm+AkS/+FsB7gPn/LCLeDOCHATwmIo9E+/41tsj3g+0dCCFkCzLPsg8hhJACGPwJIWQLwuBPCCFbEAZ/QgjZgjD4E0LIFoTBnxBCtiAM/oQQsgX5/4HY1bCAgUDBAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps\n",
    "\n",
    "\n",
    "logging.info('==== train ====')\n",
    "episode_rewards = []\n",
    "for episode in itertools.count():\n",
    "    episode_reward, elapsed_steps = play_episode(env.unwrapped, agent,\n",
    "            max_episode_steps=env._max_episode_steps, mode='train')\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('train episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-10:]) > -110:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "env.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
